
* create a global directory
global directory ""
use "$directory"
clear

use "$directory\dataset_RESTAT.dta", clear
* creating measures for the improtance of the game from teams' standing
/*
preserve
	do "$directory/creating standing_diff RESTAT Nov 2021.do"
restore
*/

drop matchobs
sort division season matchday hteam_id ateam_id minute
quietly by division season matchday hteam_id ateam_id:  gen dup = cond(_N==1,0,_n)
gen matchobs=1 if dup==0 | dup==1
*** imputing away fans
do "$directory/imputation guest fans RESTAT.do"

* take care of mistake order
rename mistake_order mistake_order_old
*drop mistake_order
sort division season matchday hteam_id ateam_id minute 
egen mistake_order=seq() if mistake==1, by(division season matchday hteam_id ateam_id )

*fill capacity and spectators when missing from observation that have missing values
egen capacity_temp=mean(capacity), by(hteam_id)
gen capacity_fill=capacity
replace capacity_fill=capacity_temp if capacity==.
gen capacity_pten=capacity_fill/10000
	
egen spectators_temp=mean(spectators), by(hteam_id ateam_id)
gen spectators_fill=spectators
replace spectators_fill=spectators_temp if spectators==.
*gen crowd = spectators divided by capacity of stadium
gen crowd=spectators_fill/capacity_fill
gen crowd_missing=1 if crowd==.
egen mean_crowd=mean(crowd)
replace crowd=mean_crowd if crowd==.

drop event_club source eventduplicated-joined dupl ateam 
sort division season matchday hteam_id ateam_id minute game_id

* define a unique game
egen game_unique_id=group(division season matchday hteam_id ateam_id)

* Events
* ======
* generating a dummy indicating whether the event happened to home or away team
* 1 for Goal; 2 (missed) penalty; 3 Yellow card; 4 Double yellow card; 5 Red card; 6 Substitution; 7 goal (incorrectly given); 8 goal (missed)
* generate yellow including double yellow card and red card

gen event_home_team= (hteam_id==event_club_id)


* generating minutes to 1st-5th mistake
* =====================================
drop if mistakes==.
forvalues m=1/5 {
	gen min_mist`m'=minute if mistake_order==`m'
}

* advantage is a dummy which is equal to 1 if the mistake was in favor of home team and 2 if the mistake was in favor of away team
gen mistake_h= (mistake) & advantage==2 
* mistake against Home
gen mistake_a= (mistake) & advantage==1
* mistake against Away 

gen mistake_h_1= (minute==min_mist1) & (mistake) & advantage==2
* first mistake to home
gen mistake_a_1= (minute==min_mist1) & (mistake) & advantage==1
* first mistake to away

* generate a dummy for all rows equal to 1 if the first mistkae to h/a
egen first_mistake_h=max(mistake_h_1), by(game_unique_id)
egen first_mistake_a=max(mistake_a_1), by(game_unique_id)

egen sum_mistake_h=sum(mistake_h), by(game_unique_id)
egen sum_mistake_a=sum(mistake_a), by(game_unique_id)

drop mistakes
gen mistakes=sum_mistake_h+sum_mistake_a

* check for sequence of mistakes
egen seq_mistake=seq() if advantage~=., by(game_unique_id)

* geberate a dymmy notifying which team got the second mistake
gen mistake_h_2= seq_mistake==2 &  advantage==2
* first mistake to home
gen mistake_a_2= seq_mistake==2 & advantage==1
* first mistake to away
egen second_mistake_h=max(mistake_h_2), by(game_unique_id)
egen second_mistake_a=max(mistake_a_2), by(game_unique_id)

* gen a dummy for whom was the first mistake given
gen max_advantage_t=advantage if seq_mistake==1
egen max_advantage=max(max_advantage_t), by(game_unique_id)
replace max_advantage=0 if max_advantage==.

* generate variable for minutes to first mistake. replace to 91 minutes if there was no mistake
sort  division season matchday hteam_id ateam_id minute game_id

egen min_mistake1=max(min_mist1), by(division season matchday hteam_id ateam_id)
replace min_mistake1=91 if min_mistake1==.
egen max_seq_end=max(game_id), by(division season matchday hteam_id ateam_id)

egen min_mistake1_fixed=max(min_mist1), by(game_unique_id)

* ===============================================
* defining Yellow cards -- the dependent variable
* ===============================================
gen yellow= (event==3 | event==4 | event==5)
replace yellow= 2 if event==5

* check whether yellow cards come at the same minute as a mistake -- and replace the yellow equal 1 with 0
sort division season matchday hteam_id ateam_id game_id
gen diff_minutes=minute-minute[_n-1] if game_id-game_id[_n-1]==1
gen delete_yellow=1 if yellow==1 & diff_minute==0 & mistake[_n-1]==1 & game_id-game_id[_n-1]==1
replace yellow=0 if delete_yellow==1
* generating dummies of Yellow cars to home vs away team
gen yellow_h=yellow if event_home_team==1
replace yellow_h=0 if event_home_team==1 & yellow_h==.
gen yellow_a=yellow if event_home_team==0
replace yellow_a=0 if event_home_team==0 & yellow_a==.

* ===========================================================================
* generate yellow cars before and after first mistake for home and away teams
* ===========================================================================
bys game_unique_id: gen cum_yellow_1_0=sum(yellow_h) if minute<min_mistake1
egen max_yellow_1_0=max(cum_yellow_1_0), by(game_unique_id)
replace max_yellow_1_0=0 if max_yellow_1_0==.
bys game_unique_id: gen cum_yellow_0_0=sum(yellow_a) if minute<min_mistake1
egen max_yellow_0_0=max(cum_yellow_0_0), by(game_unique_id)
replace max_yellow_0_0=0 if max_yellow_0_0==.
bys game_unique_id: gen cum_yellow_1_1=sum(yellow_h) if minute>=min_mistake1
egen max_yellow_1_1=max(cum_yellow_1_1), by(game_unique_id)
replace max_yellow_1_1=0 if max_yellow_1_1==.
bys game_unique_id: gen cum_yellow_0_1=sum(yellow_a) if minute>=min_mistake1
egen max_yellow_0_1=max(cum_yellow_0_1), by(game_unique_id)
replace max_yellow_0_1=0 if max_yellow_0_1==.
drop cum_yellow_1_0 cum_yellow_0_1 cum_yellow_1_1 cum_yellow_0_0

gen max_yellow_h=max_yellow_1_0+max_yellow_1_1
gen max_yellow_a=max_yellow_0_0+max_yellow_0_1
egen max_yellow=sum(yellow), by(game_unique_id)

* sum max_yellow_1_0 max_yellow_0_0 max_yellow_1_1 max_yellow_0_1 if matchobs==1

* ==============================================
* generate yellow cars before and after the half
* ==============================================
* first digit for home and second digit for first half
bys game_unique_id: gen cum_yellow_h_half1=sum(yellow_h) if minute<=45
replace cum_yellow_h_half1=0 if minute>45
egen max_yellow_half_1_0=max(cum_yellow_h_half1), by(game_unique_id)
replace max_yellow_half_1_0=0 if max_yellow_half_1_0==.

bys game_unique_id: gen cum_yellow_a_half1=sum(yellow_a) if minute<=45
replace cum_yellow_a_half1=0 if minute>45
egen max_yellow_half_0_0=max(cum_yellow_a_half1), by(game_unique_id)
replace max_yellow_half_0_0=0 if max_yellow_half_0_0==.

bys game_unique_id: gen cum_yellow_h_half2=sum(yellow_h) if minute>45
replace cum_yellow_h_half2=0 if minute<=45
egen max_yellow_half_1_1=max(cum_yellow_h_half2), by(game_unique_id)
replace max_yellow_half_1_1=0 if max_yellow_half_1_1==.

bys game_unique_id: gen cum_yellow_a_half2=sum(yellow_a) if minute>45
replace cum_yellow_a_half2=0 if minute<=45
egen max_yellow_half_0_1=max(cum_yellow_a_half2), by(game_unique_id)
replace max_yellow_half_0_1=0 if max_yellow_half_0_1==.

drop cum_yellow_h_half1 cum_yellow_a_half1 cum_yellow_h_half2 cum_yellow_a_half2
* ===========================

* generation dummies for home & away team goals + counting number of goals per game
* ====================================================================
gen goal_h= (event==1 & event_home_team==1)
gen goal_a= (event==1 & event_home_team==0)
sort game_unique_id game_id
by game_unique_id: gen cum_hgoal=sum(goal_h)
by game_unique_id: gen cum_agoal=sum(goal_a)
sort division season matchday hteam_id ateam_id minute game_id

* the diff in goals at each minute
gen diff_goal_1_1=cum_hgoal-cum_agoal

* count number of goals
gen goal= (event==1)

egen goals=sum(goal), by(game_unique_id)
egen max_goals=max(goals), by(game_unique_id)
drop event_home_team
* create diff in score in the end of the 1st half and at the first mistake
gen diff_ha_temp=diff_goal_1_1 if (minute == min_mistake1)
egen diff_ha=max(diff_ha_temp), by(game_unique_id)
drop diff_ha_temp

gen diff_min=(minute-45) if minute-45<=0
egen max_diff_min=max(diff_min), by(game_unique_id)
replace diff_min=. if diff_min~=max_diff_min

egen seq=seq() if diff_min~=., by(game_unique_id diff_min)
egen max_seq=max(seq) if diff_min~=., by(game_unique_id diff_min)

* define the diff in goals at the end of the half and when the mistake was done
gen diff_ha_half_temp=diff_goal_1_1 if max_seq==seq & seq~=.
egen diff_ha_half=max(diff_ha_half_temp), by(game_unique_id)
replace diff_ha_half=0 if diff_ha_half==.
replace diff_ha_half=diff_ha if mistakes>0

drop diff_ha_half_temp diff_min seq max_seq diff_ha max_diff_min

sort division season matchday hteam_id ateam_id minute game_id
* ===========================================================
* Summary statistics on Referees, attendace and capacity_fill
preserve
egen seq_game=seq(), by( division season matchday hteam_id ateam_id)
keep if seq_game==1

egen seq_referee=seq(), by(ref_id)
tab seq_referee if seq_referee==1

egen seq_referee_season=seq() , by(ref_id season)
egen unique_season=sum(seq_referee_season) if seq_referee_season==1, by(season)
sum unique_season

egen num_ref=count(season) if seq_game==1, by(ref_id season)
sum num_ref

restore

* ==========================================
* === Begin creating Tables 2,6 and 7 ======
* ==========================================
preserve
	do "$directory\Tables 2 6 and 7 RESTAT Nov 2021.do"
restore
* ==========================================
* === End creating Tables 2,6 and 7 ========
* ==========================================
keep if (max_seq_end==game_id)
drop game_id

keep game_unique_id ateam_id hteam_id max_yellow_1_0 max_yellow_1_1 max_yellow_0_0 max_yellow_0_1 max_yellow_half_1_0 max_yellow_half_1_1 max_yellow_half_0_0 max_yellow_half_0_1 h_shotson_wt- a_corners_k ///
	max_yellow max_goals ref_id division season year diff_ha diff_ha_half matchday ///
	hgoals agoals max_advantage first_mistake_h first_mistake_a second_mistake_h second_mistake_a sum_mistake_h sum_mistake_a min_mistake1 division ateam_standing_last_season hteam_standing_last_season ///
	predict_guest_fans spectators_fill capacity_fill capacity_pten crowd dist_places runningtrack ///
	a_standing h_standing VAR no_spectators refed_games ref_home_town ref_home_verband ref_height ref_weight ///
	ateam_standing_last_season hteam_standing_last_season ateam_division_last_season hteam_division_last_season

	
* mistake1 is a dummy equal to 1 if the first mistake was given to the home team
gen mistake1= (max_advantage==2)
* mistake0 is a dummy equal to 1 if the first mistake was given to the away team
gen mistake0= (max_advantage==1)

* check for duplicates before reshaping
duplicates tag game_unique_id, gen(tag)
duplicates drop game_unique_id, force
drop tag 

reshape long max_yellow_1_ max_yellow_0_ max_yellow_half_1_ max_yellow_half_0_ max_sub_1_ max_sub_0_ max_sub_half_1_ max_sub_half_0_, i(game_unique_id) j(after)

rename max_yellow_1_ max_yellow_1
rename max_yellow_0_ max_yellow_0
rename max_yellow_half_1_ max_yellow_half_1
rename max_yellow_half_0_ max_yellow_half_0

rename max_sub_1_ max_sub_1
rename max_sub_0_ max_sub_0
rename max_sub_half_1_ max_sub_half_1
rename max_sub_half_0_ max_sub_half_0

gen mistakes=sum_mistake_h+sum_mistake_a
gen other_mistakes_0=sum_mistake_a if max_advantage==2
gen other_mistakes_1=sum_mistake_h-1 if max_advantage==2

replace other_mistakes_0=sum_mistake_h if max_advantage==1
replace other_mistakes_1=sum_mistake_a-1 if max_advantage==1

egen game_unique_id_after=group(game_unique_id after)

duplicates tag game_unique_id_after, gen(tag)
duplicates drop game_unique_id_after, force
drop tag 

reshape long max_sub_ max_sub_half_ max_yellow_ max_yellow_half_ other_mistakes_ mistake , i(game_unique_id_after) j(home)
rename max_yellow_ yellow
rename other_mistakes_ other_mistakes
rename max_yellow_half_ max_yellow_half

rename max_sub_ substitution
rename max_sub_half_ max_sub_half

* correct other mistakes to be equal to 0 for the period before
replace other_mistakes=0 if after==0

gen home_after=home*after
gen mistake_after=mistake*after
gen home_mistake=home*mistake
* create triple interaction
gen home_mistake_after=home*mistake*after

gen mistake_second_half=1 if (min_mistake1>45 & min_mistake<90)
replace mistake_second_half=0 if (min_mistake1<=45)

gen all_yellow = yellow if mistakes>0
replace all_yellow = max_yellow_half if all_yellow==.

egen HM=max(home_mistake), by(game_unique_id)
gen AM= (mistakes>=1 & HM==0)

* generate yellow exposure to time.
sort game_unique_id after home
egen seq_game=seq(), by(game_unique_id)

* define tight game
gen diff_0= (diff_ha_half==0)
gen diff_1= (diff_ha_half>=-1 & diff_ha_half<=1)
* ======================================================
* Defining game importance
* ======================================================
rename matchday matches
* for each row merge the standing of the home and the standing of the away team ==> two merges -- one by the home and the other by the away
* first merge
* -----------
gen team_id= (home*hteam_id +(1-home)*ateam_id)
merge m:m division season team_id matches using "$directory/Standings_diff_RESTAT"
drop if _merge==2
rename _merge current_team_merge

foreach var in standing wins draws losses goals goal_diff points diff_points_p diff_points_m {
	rename `var'  home_`var'
}
drop team_id
* second merge
* -----------
gen team_id= ((1-home)*hteam_id +home*ateam_id)
merge m:m division season team_id matches using "$directory\Standings_diff_RESTAT"
* Check!!!
drop if _merge==2
rename _merge current_team_merge2

foreach var in standing wins draws losses goals goal_diff points diff_points_p diff_points_m {
	rename `var'  away_`var'
}
* ---------------------------------------
* generating the importance of the game first separately for each team
gen import_h1= home_standing<=6 | home_standing>=13 if seq_game==1 & division==1
replace import_h1= home_standing<=6 | home_standing>=13 if seq_game==1 & division==2
gen import_a1= away_standing<=6 | away_standing>=13 if seq_game==1 & division==1
replace import_a1= away_standing<=6 | away_standing>=13 if seq_game==1 & division==2

gen import_h2= (min(home_diff_points_p, abs(home_diff_points_m))<3) if seq_game==1
gen import_a2= (min(away_diff_points_p, abs(away_diff_points_m))<3) if seq_game==1

gen import_h_temp = (import_h1==1 & import_h2==1) if seq_game==1
gen import_a_temp = (import_a1==1 &  import_a2==1) if seq_game==1

egen import_h=max(import_h_temp), by(game_unique_id)
egen import_a=max(import_a_temp), by(game_unique_id)

* an index for the importance of the game combining the importacne of both teams
gen import_grp = 0 if import_h==0 & import_a==0
replace import_grp = 1 if import_h==0 & import_a==1
replace import_grp = 2 if import_h==1 & import_a==0
replace import_grp = 2 if import_h==1 & import_a==1

label define l_import_grp 0 "Low" 1 "Mid" 2 "High" 
label values import_grp l_import_grp
* ======================================================

*====================================================
* Generating variables for the analysis in the tables
*====================================================
* generate the three periods
rename no_spectators no_spectators_old
gen no_spectators=1 if season==19 & matches>=26
replace no_spectators=1 if season==20
replace no_spectators=0 if no_spectators==.

gen period=1 if VAR==0
replace period=2 if VAR==1 & no_spectators ==0
replace period=3 if VAR==1 & no_spectators ==1
tab period if seq_game ==1

* creating weights for the exposure of time until first mistake
gen weights = min_mistake1/45 if after==0 & mistakes>0
replace weights = (90-min_mistake1)/45 if after==1 & mistakes>0
replace weights = 1 if mistakes==0

gen all_yellow_weights=all_yellow/weights
winsor all_yellow_weights, gen(all_yellow_weights_w) p(0.01)
winsor all_yellow_weights, gen(all_yellow_weights_w25) p(0.025)

ttest max_yellow if seq_game==1 & mistakes<=1, by(mistakes)

* generating interactions
* =======================
* Two and three way interactions
gen HM_home=HM*home
gen HM_after=HM*after
gen HM_mistakes=HM*mistakes
gen HM_home_after=HM*home*after

gen AM_home=AM*home
gen AM_after=AM*after
gen AM_mistakes=AM*mistakes
gen AM_home_after=AM*home*after

gen home_mistakes=home*mistakes
gen home_mistakes_after=home*mistakes*after

gen mistakes_after=mistakes*after
gen home_after_HM=home*after*HM
gen home_mistakes_HM=home*mistakes*HM
gen after_mistakes_HM=after*mistakes*HM
gen home_after_mistakes_HM=home*after*mistakes*HM
gen mistake_home_after = mistake*home*after

* labeling the variables
label var home "Home Team"
label var after "After"
label var home_after "Home x After"
label var HM "Home Mistake"
label var AM "Away Mistake"
label var HM_home "Home Mistake x Home Team"
label var AM_home "Away Mistake x Away Team"
label var HM_after "Home Mistake x After"
label var AM_after "Away Mistake x After"
label var HM_home_after "Home Mistake x Home Team x After"
label var AM_home_after "Away Mistake x Home Team x After"
label var capacity_pten "Stadium Size (per 10,000)"
label var crowd "Crowding"
* end labeling

* how many uniqe referees
egen seq_ref=seq(), by(ref_id)
* tab seq_ref
* check how many uniqe referees per year
egen seq_ref_year=seq(), by(ref_id year)

egen ayellow_temp=sum(yellow) if home==0, by(game_unique_id)
egen hyellow_temp=sum(yellow) if home==1, by(game_unique_id)
egen ayellow=max(ayellow_temp), by(game_unique_id)
egen hyellow=max(hyellow_temp), by(game_unique_id)
gen yellows=ayellow if home==0
replace yellows=hyellow if home==1

gen allyellow=ayellow+hyellow if seq_game==1
gen allgoals=agoals+hgoals if seq_game==1
gen allsum_mistake=sum_mistake_h+sum_mistake_a if seq_game==1

gen asum_mistake = sum_mistake_a 
gen hsum_mistake = sum_mistake_h 

* unique referees and number of games per referee
egen count=count(ref_id) if seq_game ==1, by(ref_id)

* =======================================
* ============== TABLES =================
* =======================================

* =======================================
* ===== BEGINING OF TABLE 1 =============
* =======================================
* Table 1a - pre VAR
* ==================
preserve
keep if VAR==0
matrix define ttest_summary = J(14,5,.)
local j=0
foreach var in goals sum_mistake yellow  {
	local j=`j'+1
	local k=`j'+1
	
	sum all`var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	ttest a`var'=h`var' if seq_game==1
	matrix ttest_summary[`j',2]=round(r(mu_1), 0.01)
	matrix ttest_summary[`k',2]=round(r(sd_1), 0.01)
	matrix ttest_summary[`j',3]=round(r(mu_2), 0.01)
	matrix ttest_summary[`k',3]=round(r(sd_2), 0.01)

	matrix ttest_summary[`j',4]=round(r(mu_1)-r(mu_2), 0.01)
	matrix ttest_summary[`j',5]=round(r(p), 0.01)

	local j=`j'+1
}
foreach var in spectators_fill crowd count {
	local j=`j'+1
	local k=`j'+1
	
	sum `var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	local j=`j'+1
}

foreach var in seq_ref seq_game {
	local j=`j'+1
	sum `var' if `var'==1
	matrix ttest_summary[`j',1]=r(N)
}

matrix colnames ttest_summary = "Average" "Away Team" "Home Team" diff P_value
matrix rownames ttest_summary =  "No Goals" sd "No Mistakes" sd "No Yellow Cards" sd "Stadium Size" sd "Crowd %" sd "No Games by Referee" sd "No Unique Referees" "No Games"

esttab matrix(ttest_summary), mlabels("Average" "Away Team" "Home Team" "diff" "P_value") nonotes modelwidth(9) title(Summary Statistics) label
* adding infromation on number of mistakes 
tab season if seq_game ==1 & mistakes >0 
tab season if seq_game ==1 & HM >0 
tab season if seq_game ==1 & AM >0 

tab season if seq_game ==1 & sum_mistake_h >0 
tab season if seq_game ==1 & sum_mistake_a >0 

* adding infromation on minutes to first mistake
sum min_mistake1 if seq_game==1 & mistakes>0
sum min_mistake1 if seq_game==1 & HM==1
sum min_mistake1 if seq_game==1 & AM==1
ttest min_mistake1 if seq_game==1 & mistakes>0, by(HM)

restore

* Table 1b - post VAR with Crowd
* ==============================
preserve
 keep if VAR==1
 keep if no_spectators==0

 matrix define ttest_summary = J(14,5,.)
local j=0
foreach var in goals sum_mistake yellow {
	local j=`j'+1
	local k=`j'+1
	
	sum all`var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	ttest a`var'=h`var' if seq_game==1
	matrix ttest_summary[`j',2]=round(r(mu_1), 0.01)
	matrix ttest_summary[`k',2]=round(r(sd_1), 0.01)
	matrix ttest_summary[`j',3]=round(r(mu_2), 0.01)
	matrix ttest_summary[`k',3]=round(r(sd_2), 0.01)

	matrix ttest_summary[`j',4]=round(r(mu_1)-r(mu_2), 0.01)
	matrix ttest_summary[`j',5]=round(r(p), 0.01)

	local j=`j'+1
}
foreach var in spectators_fill crowd count {
	local j=`j'+1
	local k=`j'+1
	
	sum `var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	local j=`j'+1
}

foreach var in seq_ref seq_game {
	local j=`j'+1
	sum `var' if `var'==1
	matrix ttest_summary[`j',1]=r(N)
}

matrix colnames ttest_summary = "Average" "Away Team" "Home Team" diff P_value
matrix rownames ttest_summary =  "No Goals" sd "No Mistakes" sd "No Yellow Cards" sd "Stadium Size" sd "Crowd %" sd "No Games by Referee" sd "No Unique Referees" "No Games"

esttab matrix(ttest_summary), mlabels("Average" "Away Team" "Home Team" "diff" "P_value") nonotes modelwidth(9) title(Summary Statistics) label

* adding infromation on number of mistakes 
tab season if seq_game ==1 & mistakes >0 
tab season if seq_game ==1 & HM >0 
tab season if seq_game ==1 & AM >0 

tab season if seq_game ==1 & sum_mistake_h >0 
tab season if seq_game ==1 & sum_mistake_a >0 

* adding infromation on minutes to first mistake
sum min_mistake1 if seq_game==1 & mistakes>0
sum min_mistake1 if seq_game==1 & HM==1
sum min_mistake1 if seq_game==1 & AM==1
ttest min_mistake1 if seq_game==1 & mistakes>0, by(HM)

restore

* Table 1c - post VAR without Crowd
* =================================
preserve
 keep if  no_spectators==1
 keep if VAR==1
matrix define ttest_summary = J(14,5,.)
local j=0
foreach var in goals sum_mistake yellow {
	local j=`j'+1
	local k=`j'+1
	
	sum all`var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	ttest a`var'=h`var' if seq_game==1
	matrix ttest_summary[`j',2]=round(r(mu_1), 0.01)
	matrix ttest_summary[`k',2]=round(r(sd_1), 0.01)
	matrix ttest_summary[`j',3]=round(r(mu_2), 0.01)
	matrix ttest_summary[`k',3]=round(r(sd_2), 0.01)

	matrix ttest_summary[`j',4]=round(r(mu_1)-r(mu_2), 0.01)
	matrix ttest_summary[`j',5]=round(r(p), 0.01)

	local j=`j'+1
}
foreach var in spectators_fill crowd count {
	local j=`j'+1
	local k=`j'+1
	
	sum `var' if seq_game==1
	matrix ttest_summary[`j',1]=round(r(mean), 0.01)
	matrix ttest_summary[`k',1]=round(r(sd), 0.01)
	local j=`j'+1
}

foreach var in seq_ref seq_game {
	local j=`j'+1
	qui: sum `var' if `var'==1
	matrix ttest_summary[`j',1]=r(N)
}

matrix colnames ttest_summary = "Average" "Away Team" "Home Team" diff P_value
matrix rownames ttest_summary =  "No Goals" sd "No Mistakes" sd "No Yellow Cards" sd "Stadium Size" sd "Crowd %" sd "No Games by Referee" sd "No Unique Referees" "No Games"
matrix list ttest_summary

esttab matrix(ttest_summary), mlabels("Average" "Away Team" "Home Team" "diff" "P_value") nonotes modelwidth(9) title(Summary Statistics) label
* adding infromation on number of mistakes 
tab season if seq_game ==1 & mistakes >0 
tab season if seq_game ==1 & HM >0 
tab season if seq_game ==1 & AM >0 

tab season if seq_game ==1 & sum_mistake_h >0 
tab season if seq_game ==1 & sum_mistake_a >0 

* adding infromation on minutes to first mistake
sum min_mistake1 if seq_game==1 & mistakes>0
sum min_mistake1 if seq_game==1 & HM==1
sum min_mistake1 if seq_game==1 & AM==1
ttest min_mistake1 if seq_game==1 & mistakes>0, by(HM)

restore
* =======================================
* ===== END OF TABLE 1 ==================
* =======================================

* =======================================
* ===== =======TABLE 2 ==================
* =======================================
* done in row 249
* =======================================
* ===== END OF TABLE 2 ==================
* =======================================

* =======================================
* ===== =======TABLE 3 ==================
* =======================================

eststo clear
forvalues int=1(1)3 {
preserve
	keep if period==`int'
	eststo: qui: reghdfe all_yellow_weights_w home after home_after HM AM HM_home AM_home HM_after AM_after HM_home_after AM_home_after crowd capacity_pten, absorb(game_unique_id) cluster(game_unique_id ref_id)
	qui: sum all_yellow_weights_w if e(sample)
	scalar MeanF=r(mean) 
	estadd scalar Mean=MeanF
	keep if (min_mistake<85 | mistakes==0)
	eststo: qui: reghdfe all_yellow_weights_w home after home_after HM AM HM_home AM_home HM_after AM_after HM_home_after AM_home_after crowd capacity_pten, absorb(game_unique_id) cluster(game_unique_id ref_id)
	qui: sum all_yellow_weights_w if e(sample)
	scalar MeanF=r(mean) 
	estadd scalar Mean=MeanF
restore 
}
esttab using table3.tex, replace ///
	b(%9.3f) modelwidth(13) stats(N r2 Mean,fmt(%6.0fc %5.4f %5.4f)) varwidth(25) se star(t 0.15 * 0.1 ** 0.05 *** 0.01) ///
	order(home after home_after HM_home AM_home HM_after AM_after HM_home_after AM_home_after) label ///
	keep(home after home_after HM_home AM_home HM_after AM_after HM_home_after AM_home_after) ///
	mlabels("All Games" "Minutes<85" "All Games" "Minutes<85" "All Games" "Minutes<85" ) nonotes ///
	addnotes("Robust standard errors are reported in parentheses.") ///
	title("Yellow Cards and Before/After A Mistake." ) 

* =======================================
* ===== END OF TABLE 3 ==================
* =======================================

* =======================================
* ============ TABLE 4 ==================
* =======================================

* Summary of characteristics of the game -- whether games become more or less aggresive as a result of no crowd
matrix define summary_mistake_diff = J(20,9,.)

preserve 
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners{
	rename `var'_k `var'
} 
keep if period==1
tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean)  save
return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',1]=r(StatTotal)[1,`i']
}

tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean) by(mistake) save
return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',2]=r(Stat2)[1,`i']-r(Stat1)[1,`i']
}
local int=1
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners {
	qui: ttest 	`var' if seq_game==1 , by(mistake)
	matrix summary_mistake_diff[`int',3]=round(r(p),0.001)
	
	local int=`int'+1
}
restore

preserve
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners{
	rename `var'_k `var'
} 

keep if period==2

tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean)  save
return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',4]=r(StatTotal)[1,`i']
}

tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean) by(mistake) save
return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',5]=r(Stat2)[1,`i']-r(Stat1)[1,`i']
}

local int=1
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners {
	qui: ttest 	`var' if seq_game==1 , by(mistake)
	matrix summary_mistake_diff[`int',6]=round(r(p),0.001)
	
	local int=`int'+1
}
restore

preserve
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners{
	rename `var'_k `var'
} 
keep if period==3

tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean)  save
return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',7]=r(StatTotal)[1,`i']
}

tabstat h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners if seq_game==1, stat(mean) by(mistake) save

return list

forvalue i=1(1)20 {
	matrix summary_mistake_diff[`i',8]=r(Stat2)[1,`i']-r(Stat1)[1,`i']
}

local int=1
foreach var in h_shots a_shots h_distance a_distance h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners {
	qui: ttest 	`var' if seq_game==1 , by(mistake)
	matrix summary_mistake_diff[`int',9]=round(r(p),0.001)
	
	local int=`int'+1
}
restore

matrix colnames summary_mistake_diff = "Average" "Diff pre-VAR"  P_value "Average" "Diff VAR"  P_value "Average" "Diff VAR/no-Crowd" P-value
matrix rownames summary_mistake_diff =  h_shots a_shots h_distance a_distance" h_succpasses a_succpasses h_failpasses a_failpasses h_posession a_posession h_tackles a_tackles h_fouls a_fouls h_fouled a_fouled h_offside a_offside h_corners a_corners
matrix list summary_mistake_diff, format(%9.3f) 

* =======================================
* ===== END OF TABLE 4 ==================
* =======================================


* =======================================
* ============ TABLE 5 ==================
* =======================================
preserve
eststo clear
keep if (min_mistake<85 | mistakes==0)
keep if period==1
* not many games with no_spectators
* keep if no_spectators==1
forvalue int=0(1)2 {
	eststo: qui: reghdfe all_yellow_weights_w home after home_after HM AM HM_home AM_home HM_after AM_after HM_home_after AM_home_after crowd capacity_pten if import_grp==`int', absorb(game_unique_id) cluster(game_unique_id ref_id)
qui: sum all_yellow_weights_w if e(sample)
scalar MeanF=r(mean) 
estadd scalar Mean=MeanF	
}
eststo: qui: reghdfe all_yellow_weights_w home after home_after HM AM HM_home AM_home HM_after AM_after HM_home_after AM_home_after if diff_1==0, absorb(game_unique_id) cluster(game_unique_id ref_id)
qui: sum all_yellow_weights_w if e(sample)
scalar MeanF=r(mean) 
estadd scalar Mean=MeanF
eststo: qui: reghdfe all_yellow_weights_w home after home_after HM AM HM_home AM_home HM_after AM_after HM_home_after AM_home_after if diff_1==1, absorb(game_unique_id) cluster(game_unique_id ref_id)
qui: sum all_yellow_weights_w if e(sample)
scalar MeanF=r(mean) 
estadd scalar Mean=MeanF

esttab using table5.tex, replace   ///
	b(%9.3f) modelwidth(20) stats(N r2 Mean,fmt(%6.0fc %5.4f %5.4f)) varwidth(35) se star(t 0.15 * 0.1 ** 0.05 *** 0.01) ///
	order(home after home_after HM_home AM_home HM_after AM_after HM_home_after AM_home_after) label ///
	keep(home after home_after HM_home AM_home HM_after AM_after HM_home_after AM_home_after) ///
	mlabels("Low" "Mild" "High" "Not Close" "Close") nonotes /// 
	addnotes("Robust standard errors are reported in parentheses.") ///
	title("Yellow Cards Home/Away Team, Before/After A Mistake t<=85 for Tight Games" ) 
restore

* =======================================
* ===== END OF TABLE 5 ==================
* =======================================


* =======================================
* ===== =======TABLE 6 ==================
* =======================================
* done in row 249
* =======================================
* ===== END OF TABLE 6 ==================
* =======================================

* =======================================
* ===== =======TABLE 7 ==================
* =======================================
* done in row 249
* =======================================
* ===== END OF TABLE 7 ==================
* =======================================
